#Google FONTS
library(showtext)
#Lato for numbers, Assistant for text
font_add_google("Lato")
font_add_google("Assistant")
showtext_auto()
# Read in all three datasets.

life_expectancy_years <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/life_expectancy_years.csv")
## Rows: 187 Columns: 220
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (1): geo
## dbl (219): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
income_per_person <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/income_per_person.csv")
## Rows: 193 Columns: 220
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (1): geo
## dbl (219): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
countries_total <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/countries_total.csv")
## Rows: 248 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): name, alpha-2, alpha-3, iso_3166-2, region, sub-region, intermediat...
## dbl (4): country-code, region-code, sub-region-code, intermediate-region-code
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
population <- read_csv("C:/Users/Adam Moffitt/OneDrive - West Chester University of PA/STA553/Week 5/population_total.csv")
## Rows: 195 Columns: 220
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (1): geo
## dbl (219): 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810,...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Manipulated the data to write a final data set

lifeexpectancylong <- life_expectancy_years %>%
  gather(key = "Year",       # the column names of the wide table
         value = "LifeExp",  # the numerical values of the table
         - geo,          # drop geo variable: its value will not be gathered (stacked)!
         na.rm = TRUE
         )   
incomeperpersonlong <- income_per_person %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Income",  # the numerical values of the table
         - geo,          # drop geo variable: its value will not be gathered (stacked)!
         na.rm = TRUE
         )       # removing records with missing values ?
# combine the two tables together

populationlong <- population %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Population",  # the numerical values of the table
         - geo,          # drop geo variable: its value will not be gathered (stacked)!
         na.rm = TRUE
         )       

LifeExpIncom <-left_join(incomeperpersonlong , lifeexpectancylong, by = c("geo" , "Year")) 
LifeExpIncom2 <- left_join(LifeExpIncom , populationlong , by = c("geo","Year"))



## Subregions data set
subregions <- countries_total %>% 
    select(name , region)

# Add sub regions to create final data set.
LifeExpIncomFinal <- LifeExpIncom2 %>% left_join(subregions, by = c("geo" = "name"))
#Location of Final dataset on Github
LifeExpIncomFinal <- "https://raw.githubusercontent.com/APM3030/STA553/main/homework3/LifeExpIncomFinal.csv"
LifeExpIncomFinal <- read_csv(LifeExpIncomFinal)
## New names:
## * `` -> ...1
## * ...1 -> ...2
## Rows: 42486 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): geo, region
## dbl (6): ...1, ...2, Year, Income, LifeExp, Population
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#convert year to integer
LifeExpIncomFinal <- mutate(LifeExpIncomFinal, Year = as.integer(LifeExpIncomFinal$Year))
    ggplot(LifeExpIncomFinal, aes(Income, LifeExp, 
                  size = Population, colour = region)) +
    geom_point(alpha = 0.5) +
        guides(size = FALSE)+
    scale_size(range = c(2, 12)) +
           scale_x_log10(labels = scales::dollar) +
    labs(title = 'Relationship Between Life Expectancy and GDP' ,
    subtitle = 'Year: {frame_time}', 
                    x = 'GDP Per Capita',
                    y = 'Life Expectancy in Years',
                    colour = 'Region:')+
    theme_minimal() +
    theme( 
        axis.line = element_line(size = .7, linetype = "solid") ,
        text=element_text(size = 18 , family="Assistant"),
        axis.title.y = element_text(vjust = -2),
        axis.title.x = element_text(vjust = 3),
        plot.title = element_text(face = "bold" , size = 26 , vjust = -3.5),
        plot.subtitle = element_text(vjust = -1),
        axis.text.x = element_text(family = "Lato" , size = 16),
        axis.text.y = element_text(family = "Lato" , size = 16),
        legend.title = element_text(face = "bold")
        ) +
    transition_time(Year)

#subet data for year 2015
leifinal2015 <- LifeExpIncomFinal %>% filter(Year == 2015)
#plot the data 
#consider using ggplotly
plot_ly(
  data = leifinal2015,
  x = ~Income ,  # Horizontal axis 
  y = ~LifeExp ,   # Vertical axis   # must be a numeric factor
  text = ~geo,   #location in the hover text
hovertemplate = paste(
      "%{yaxis.title.text}: %{y:}<br>",

      "%{xaxis.title.text}: %{x:}<br>",

      "Population: %{marker.size:}",
      
      "<extra></extra>"),

  color = ~factor(geo),
  alpha  = 0.5 ,
  size = ~Population,
  type = "scatter",
  mode = "markers"
   ) %>%
    layout(showlegend = FALSE, 
            title =list(text = "Relationship Between Life Expectancy and GDP", 
                          font = list(family = "Arial",    
                                        size = 18,
                                      face = "bold")),
           xaxis = list( 
                    title=list(text = 'GDP Per Capita')),
           yaxis = list (
                    title = list(text = 'Life Expectancy')
           )

    )
## Warning: Ignoring 6 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors